https://guangchuangyu.github.io/2016/01/go-analysis-using-clusterprofiler/
# Load library and search for the organism
library(clusterProfiler)
library(pathview)
library(tidyverse)
library(enrichplot)
Build the Salmon Data Base and import the gene universe (background genes from Salmon genome)
library(Category)
library(AnnotationHub)
hub <- AnnotationHub()
query(hub, c("salmo salar","orgdb"))
## AnnotationHub with 1 record
## # snapshotDate(): 2018-04-30
## # names(): AH61820
## # $dataprovider: ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/
## # $species: Salmo salar
## # $rdataclass: OrgDb
## # $rdatadateadded: 2018-04-20
## # $title: org.Salmo_salar.eg.sqlite
## # $description: NCBI gene ID based annotations about Salmo salar
## # $taxonomyid: 8030
## # $genome: NCBI genomes
## # $sourcetype: NCBI/UniProt
## # $sourceurl: ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/, ftp://ftp.uniprot....
## # $sourcesize: NA
## # $tags: c("NCBI", "Gene", "Annotation")
## # retrieve record with 'object[["AH61820"]]'
salmodb <- hub[["AH61820"]]
DatPkgFactory(salmodb)
## An object of class "Org.XX.egDatPkg"
## Slot "name":
## [1] "OrgDb for Salmo salar"
##
## Slot "db":
## OrgDb object:
## | DBSCHEMAVERSION: 2.1
## | DBSCHEMA: NOSCHEMA_DB
## | ORGANISM: Salmo salar
## | SPECIES: Salmo salar
## | CENTRALID: GID
## | Taxonomy ID: 8030
## | Db type: OrgDb
## | Supporting package: AnnotationDbi
##
## Slot "installed":
## [1] FALSE
columns(salmodb)
## [1] "ACCNUM" "ALIAS" "CHR" "ENTREZID" "EVIDENCE"
## [6] "EVIDENCEALL" "GENENAME" "GID" "GO" "GOALL"
## [11] "ONTOLOGY" "ONTOLOGYALL" "PMID" "REFSEQ" "SYMBOL"
## [16] "UNIGENE"
gene_universe <- read.csv('Results/gene_universe.csv')
The following code loads ALL differentially expressed genes with an adjusted p-value < 0.05, and then selects those with an absolute logfold change of 2.
POMV6 <- read.csv('Results/ControlvsPOMV6_ALL.csv') # CHECK FOR NAs
POMV6_SigGeneList <- POMV6[abs(POMV6$logFC) >= 2,]
rownames(POMV6_SigGeneList) <- NULL
POMV6_SigGeneList_logFC <- POMV6_SigGeneList$logFC
names(POMV6_SigGeneList_logFC) <- POMV6_SigGeneList$ENTREZID
ego_POMV6 <- enrichGO(gene = names(POMV6_SigGeneList_logFC),
universe = as.character(gene_universe$ENTREZID),
keyType = 'ENTREZID',
OrgDb = salmodb,
ont = "BP",
pAdjustMethod = "fdr",
pvalueCutoff = 0.05,
qvalueCutoff = 0.05)
head(summary(ego_POMV6))
## ID Description GeneRatio
## GO:0006952 GO:0006952 defense response 14/69
## GO:0002376 GO:0002376 immune system process 17/69
## GO:0009615 GO:0009615 response to virus 7/69
## GO:0006955 GO:0006955 immune response 13/69
## GO:0051607 GO:0051607 defense response to virus 6/69
## GO:0043207 GO:0043207 response to external biotic stimulus 7/69
## BgRatio pvalue p.adjust qvalue
## GO:0006952 132/10679 8.378159e-14 2.211834e-11 2.010758e-11
## GO:0002376 277/10679 1.186589e-12 1.566297e-10 1.423906e-10
## GO:0009615 29/10679 4.799528e-10 4.223585e-08 3.839623e-08
## GO:0006955 221/10679 1.247334e-09 8.232402e-08 7.484002e-08
## GO:0051607 21/10679 2.930697e-09 1.547408e-07 1.406734e-07
## GO:0043207 42/10679 7.764224e-09 2.928222e-07 2.662020e-07
## geneID
## GO:0006952 100195148/100196303/100136541/100136587/106566099/106570878/106570889/106591927/106596334/106600865/100194889/100136920/100196194/106602560
## GO:0002376 100195148/100196303/100136541/100136587/106565339/106565855/106566099/106570878/106570889/106591927/106596334/106600865/100194889/100136920/100196194/106602560/106586517
## GO:0009615 100196303/100136541/106566099/106596334/106600865/100194889/106602560
## GO:0006955 100195148/100196303/100136541/100136587/106565339/106565855/106570878/106570889/106591927/100136920/100196194/106602560/106586517
## GO:0051607 100136541/106566099/106596334/106600865/100194889/106602560
## GO:0043207 100196303/100136541/106566099/106596334/106600865/100194889/106602560
## Count
## GO:0006952 14
## GO:0002376 17
## GO:0009615 7
## GO:0006955 13
## GO:0051607 6
## GO:0043207 7
barplot(ego_POMV6, showCategory=20, font.size = 10)
dotplot(ego_POMV6, showCategory=20, font.size = 10)
emapplot(ego_POMV6)
cnetplot(ego_POMV6, categorySize="genNUM", foldChange=POMV6_SigGeneList_logFC)
The following code loads ALL differentially expressed genes with an adjusted p-value < 0.05, and then selects those with an absolute logfold change of 2.
POMV24 <- read.csv('Results/ControlvsPOMV24_ALL.csv') # CHECK FOR NAs
POMV24_SigGeneList <- POMV24[abs(POMV24$logFC) >= 2,]
rownames(POMV24_SigGeneList) <- NULL
POMV24_SigGeneList_logFC <- POMV24_SigGeneList$logFC
names(POMV24_SigGeneList_logFC) <- POMV24_SigGeneList$ENTREZID
ego_POMV24 <- enrichGO(gene = names(POMV24_SigGeneList_logFC),
universe = as.character(gene_universe$ENTREZID),
keyType = 'ENTREZID',
OrgDb = salmodb,
ont = "BP",
pAdjustMethod = "fdr",
pvalueCutoff = 0.05,
qvalueCutoff = 0.05)
head(summary(ego_POMV24))
## ID Description GeneRatio
## GO:0006952 GO:0006952 defense response 55/1356
## GO:0002376 GO:0002376 immune system process 84/1356
## GO:0006955 GO:0006955 immune response 70/1356
## GO:0051607 GO:0051607 defense response to virus 17/1356
## GO:0007264 GO:0007264 small GTPase mediated signal transduction 96/1356
## GO:0098542 GO:0098542 defense response to other organism 20/1356
## BgRatio pvalue p.adjust qvalue
## GO:0006952 132/10679 5.941100e-17 6.951087e-14 6.666539e-14
## GO:0002376 277/10679 3.847759e-15 2.250939e-12 2.158795e-12
## GO:0006955 221/10679 7.496142e-14 2.923496e-11 2.803820e-11
## GO:0051607 21/10679 1.918880e-12 5.612724e-10 5.382964e-10
## GO:0007264 394/10679 8.178118e-11 1.913680e-08 1.835342e-08
## GO:0098542 33/10679 1.155639e-10 2.253497e-08 2.161248e-08
## geneID
## GO:0006952 100196184/106601862/100195999/100195148/100137019/100136436/100195681/100136439/100136449/100136453/100136456/100136541/100136587/100302030/101448043/106561958/106565979/106566099/106569524/106570878/106570889/106574371/106575194/106576643/106576647/106577834/106577950/106583498/106589772/106590949/106591927/106593218/106595807/106596088/106596190/106596334/106599427/106600865/106600963/106600969/106600975/106601371/106601388/106604275/106606549/106607463/106609132/106609858/106611470/100194889/100136920/100196194/100270812/106602560/106611198
## GO:0002376 100196184/106601862/100195999/100195148/100329176/100137019/100136436/100195681/100136548/100846970/106584118/100136449/100136541/100136587/100302030/101448043/106563358/106564720/106565339/106565694/106565799/106565855/106565979/106566099/106567717/106569524/106570878/106570886/106570889/106572809/106574371/106574975/106575194/106575840/106575849/106576643/106576647/106577010/106577763/106577765/106577834/106577950/106579494/106581031/106585878/106586893/106588814/106589772/106590519/106590949/106591927/106593218/106595807/106596088/106596334/106599427/106600865/106600963/106600975/106601371/106601388/106602980/106604275/106605663/106606549/106606848/106607463/106607531/106608811/106609048/106609132/106609531/106609858/106611470/106613961/100194889/100136920/100196194/100270812/106602560/100136458/106586517/100196575/106611198
## GO:0006955 100196184/106601862/100195999/100195148/100137019/100136436/100195681/100136548/100846970/106584118/100136449/100136541/100136587/100302030/106563358/106564720/106565339/106565694/106565799/106565855/106565979/106567717/106569524/106570878/106570886/106570889/106572809/106574371/106574975/106575194/106575840/106575849/106576643/106576647/106577010/106577765/106577834/106577950/106579494/106581031/106585878/106586893/106589772/106590519/106591927/106593218/106595807/106596088/106599427/106600975/106601371/106601388/106602980/106604275/106605663/106606549/106606848/106607531/106608811/106609048/106609858/106613961/100136920/100196194/100270812/106602560/100136458/106586517/100196575/106611198
## GO:0051607 100137019/100136436/100136541/100302030/101448043/106566099/106590949/106596334/106600865/106600963/106600975/106607463/106609132/106611470/100194889/106602560/106611198
## GO:0007264 106574446/106610330/100380466/106613922/100196534/106560488/106560490/106561157/106562829/106562943/106563223/106565273/106565544/106565828/106566806/106566812/106566904/106567797/106568132/106568270/106568392/106569172/106570905/106571247/106571902/106572387/106576073/106576227/106576952/106577639/106577853/106577869/106578971/106579157/106579284/106579856/106579924/106579959/106580138/106580155/106580158/106580556/106581424/106581831/106582025/106583366/106584026/106584283/106584386/106584876/106585456/106585561/106585588/106586158/106586233/106588489/106588797/106588976/106589836/106591236/106591860/106591994/106592352/106594155/106594609/106599451/106600087/106600822/106601059/106601313/106603123/106603740/106604741/106605732/106606050/106607078/106607387/106607499/106608153/106608271/106608981/106609016/106609220/106609966/106611722/106611922/106612426/106613940/106581351/106601189/106610804/106613823/106564550/106600638/106611626/106607670
## GO:0098542 106601862/100137019/100136436/100136439/100136453/100136541/100302030/101448043/106566099/106590949/106596334/106600865/106600963/106600975/106607463/106609132/106611470/100194889/106602560/106611198
## Count
## GO:0006952 55
## GO:0002376 84
## GO:0006955 70
## GO:0051607 17
## GO:0007264 96
## GO:0098542 20
barplot(ego_POMV24, showCategory=20, font.size = 10)
dotplot(ego_POMV24, showCategory=20, font.size = 10)
emapplot(ego_POMV24, showCategory = 20)
cnetplot(ego_POMV24, categorySize="genNUM", foldChange=POMV24_SigGeneList_logFC, node_label = FALSE)
The following code loads ALL differentially expressed genes with an adjusted p-value < 0.05, and then selects those with an absolute logfold change of 2.
ISAV6 <- read.csv('Results/ControlvsISAV6_ALL.csv') # CHECK FOR NAs
ISAV6_SigGeneList <- ISAV6[abs(ISAV6$logFC) >= 2,]
rownames(ISAV6_SigGeneList) <- NULL
ISAV6_SigGeneList_logFC <- ISAV6_SigGeneList$logFC
names(ISAV6_SigGeneList_logFC) <- ISAV6_SigGeneList$ENTREZID
ego_ISAV6 <- enrichGO(gene = names(ISAV6_SigGeneList_logFC),
universe = as.character(gene_universe$ENTREZID),
keyType = 'ENTREZID',
OrgDb = salmodb,
ont = "BP",
pAdjustMethod = "fdr",
pvalueCutoff = 0.05,
qvalueCutoff = 0.05)
head(summary(ego_ISAV6))
## ID Description GeneRatio BgRatio
## GO:0002376 GO:0002376 immune system process 27/85 277/10679
## GO:0006955 GO:0006955 immune response 24/85 221/10679
## GO:0006952 GO:0006952 defense response 18/85 132/10679
## GO:0045087 GO:0045087 innate immune response 11/85 71/10679
## GO:0034097 GO:0034097 response to cytokine 8/85 37/10679
## GO:0050776 GO:0050776 regulation of immune response 9/85 70/10679
## pvalue p.adjust qvalue
## GO:0002376 1.219522e-22 3.243928e-20 2.323510e-20
## GO:0006955 3.202610e-21 4.259471e-19 3.050907e-19
## GO:0006952 8.325678e-18 7.382101e-16 5.287536e-16
## GO:0045087 7.253815e-12 4.823787e-10 3.455106e-10
## GO:0034097 3.684795e-10 1.960311e-08 1.404101e-08
## GO:0050776 3.645326e-09 1.616095e-07 1.157551e-07
## geneID
## GO:0002376 100195999/100195148/100196303/100136548/100846970/100136541/100136587/106563358/106564720/106565339/106565855/106566099/106570878/106570889/106574975/106575849/106585878/106591927/106593218/106596088/106599427/106600865/100194889/100136920/100196194/100270812/106602560
## GO:0006955 100195999/100195148/100196303/100136548/100846970/100136541/100136587/106563358/106564720/106565339/106565855/106570878/106570889/106574975/106575849/106585878/106591927/106593218/106596088/106599427/100136920/100196194/100270812/106602560
## GO:0006952 100195999/100195148/100196303/100136541/100136587/106566099/106570878/106570889/106591927/106593218/106596088/106599427/106600865/100194889/100136920/100196194/100270812/106602560
## GO:0045087 100195148/100196303/100136541/100136587/106591927/106593218/106596088/106599427/100136920/100270812/106602560
## GO:0034097 100136541/100136587/106591927/106593218/106596088/106599427/100136920/100270812
## GO:0050776 100136548/100846970/106574975/106575849/106591927/106593218/106596088/106599427/106602560
## Count
## GO:0002376 27
## GO:0006955 24
## GO:0006952 18
## GO:0045087 11
## GO:0034097 8
## GO:0050776 9
barplot(ego_ISAV6, showCategory=20, font.size = 10)
dotplot(ego_ISAV6, showCategory=20, font.size = 10)
emapplot(ego_ISAV6, font.size = 10)
cnetplot(ego_ISAV6, categorySize="genNUM", foldChange=POMV6_SigGeneList_logFC, font.size = 10)
The following code loads ALL differentially expressed genes with an adjusted p-value < 0.05, and then selects those with an absolute logfold change of 2.
ISAV24 <- read.csv('Results/ControlvsISAV24_ALL.csv') # CHECK FOR NAs
ISAV24_SigGeneList <- ISAV24[abs(ISAV24$logFC) >= 2,]
rownames(ISAV24_SigGeneList) <- NULL
ISAV24_SigGeneList_logFC <- ISAV24_SigGeneList$logFC
names(ISAV24_SigGeneList_logFC) <- ISAV24_SigGeneList$ENTREZID
ego_ISAV24 <- enrichGO(gene = names(ISAV24_SigGeneList_logFC),
universe = as.character(gene_universe$ENTREZID),
keyType = 'ENTREZID',
OrgDb = salmodb,
ont = "BP",
pAdjustMethod = "fdr",
pvalueCutoff = 0.05,
qvalueCutoff = 0.05)
head(summary(ego_ISAV6))
## ID Description GeneRatio BgRatio
## GO:0002376 GO:0002376 immune system process 27/85 277/10679
## GO:0006955 GO:0006955 immune response 24/85 221/10679
## GO:0006952 GO:0006952 defense response 18/85 132/10679
## GO:0045087 GO:0045087 innate immune response 11/85 71/10679
## GO:0034097 GO:0034097 response to cytokine 8/85 37/10679
## GO:0050776 GO:0050776 regulation of immune response 9/85 70/10679
## pvalue p.adjust qvalue
## GO:0002376 1.219522e-22 3.243928e-20 2.323510e-20
## GO:0006955 3.202610e-21 4.259471e-19 3.050907e-19
## GO:0006952 8.325678e-18 7.382101e-16 5.287536e-16
## GO:0045087 7.253815e-12 4.823787e-10 3.455106e-10
## GO:0034097 3.684795e-10 1.960311e-08 1.404101e-08
## GO:0050776 3.645326e-09 1.616095e-07 1.157551e-07
## geneID
## GO:0002376 100195999/100195148/100196303/100136548/100846970/100136541/100136587/106563358/106564720/106565339/106565855/106566099/106570878/106570889/106574975/106575849/106585878/106591927/106593218/106596088/106599427/106600865/100194889/100136920/100196194/100270812/106602560
## GO:0006955 100195999/100195148/100196303/100136548/100846970/100136541/100136587/106563358/106564720/106565339/106565855/106570878/106570889/106574975/106575849/106585878/106591927/106593218/106596088/106599427/100136920/100196194/100270812/106602560
## GO:0006952 100195999/100195148/100196303/100136541/100136587/106566099/106570878/106570889/106591927/106593218/106596088/106599427/106600865/100194889/100136920/100196194/100270812/106602560
## GO:0045087 100195148/100196303/100136541/100136587/106591927/106593218/106596088/106599427/100136920/100270812/106602560
## GO:0034097 100136541/100136587/106591927/106593218/106596088/106599427/100136920/100270812
## GO:0050776 100136548/100846970/106574975/106575849/106591927/106593218/106596088/106599427/106602560
## Count
## GO:0002376 27
## GO:0006955 24
## GO:0006952 18
## GO:0045087 11
## GO:0034097 8
## GO:0050776 9
barplot(ego_ISAV24, showCategory=20, font.size = 10)
dotplot(ego_ISAV24, showCategory=20, font.size = 10)
emapplot(ego_ISAV24, font.size = 10)
cnetplot(ego_ISAV24, categorySize="genNUM", foldChange=POMV6_SigGeneList_logFC, font.size = 10)